function [last_episode,all_episodes]=simulation_exp(alpha,beta,T,vh,vl,mu_l,sigma_l,grid_middle,tick,grid_size,a2)

%Runs a learning experiment once, for T episodes. 

%AMM 2 is always playing a price of a2.

%This function returns:

%last_episode: A 1 x 7 vector with the values in the last episode T of 
%A_1 (quoted price of AMM 1) and opt_1 (greedy price of AMM 1), 
%as well as v_tilde (asset's fundamental value), l (trader's liquidity shock), 
%C_T (number of consecutive episodes before T in which all AMMs play the same price as in T).

%The columns of last_episode are ordered as follows:
%1: A_1
%2: A_2 (set to 5.0)
%3: opt_1
%4: opt_2 (set to 5.0)
%5: v_tilde
%6: l
%7: C_T

%all_episodes: A Tx8 matrix containing for each episode t the values of A_1 (quoted price of AMM 1), 
%opt_1 (greedy price of AMM 1), Profit_1 (profit of AMM 1), a_min (lowest quoted price), 
%and the squared values of these variables.

%The columns all_episodes are ordered as follows:
%1: A_1
%2: opt_1
%3: Profit_1
%4: a_min
%5: A_1^2
%6: opt_1^2
%7: Profit_1^2
%8: a_min^2

%Pre-allocate
all_episodes = zeros(T,8);

%Inizialize Q-matrix with random values higher than monopoly expected
%profit. With the baseline parameters we have 139 rows (one for each price) and 1 column (for AMM 1).

Q_n=3+(6-3)*rand((2*grid_size+1),1);

%Generate a vector of T observation with prob=0.5 to be vl and prob=0.5 to be vh.
v_tilde = randsample([vl, vh], T, true)';             %compute the value of the asset in each episode
l  = normrnd(mu_l,sigma_l,T,1);                       %vector of random draws of l in each episode
vc = v_tilde +l;                                      %vector of investor valuations in each episode

%Generate vector to determine experimentation episodes
epsilon = exp(-beta*(1:1:T));               %Vector of experimentation probabilities

%Generate T x 1 matrix with 1 in (t,1) if AMM 1 experiments at time t.
Experiment(:,1) = binornd(1,epsilon);  

%Loop over all episodes.
for t = 1:T
    %We compute the greedy price in episode t for AMM 1.

    %Only player 1 is endogenous, player 2 always plays a2.
    i = 1;

        maxvector = find(Q_n(:,i) == max(Q_n(:,i)));  %Indices of all the values corresponding to a maximum. 
        %Note: finding all the values with maxvector is irrelevant if the Q_matrix has been initialized with
        %continuous random variables, but is important otherwise.
        %index of price (from 1 to 2*grid_size+1) chosen by AMM 1:
        s  = maxvector(randi([1 length(maxvector)],1,1)); %Randomize if there are multiple greedy prices
        all_episodes(t,2) = (grid_middle - grid_size*tick - tick)+(s(1,i)*tick);  %Convert the price index 
        %into the actual price and record the greedy price of AMM i at time t

    %Compute the actual price chosen by AMM 1 based on experimentation or exploitation 
    %If Experiment=1 AMM 1 explores at this round. 

        if Experiment(t,i) == 1
            s  = randi([1 (2*grid_size+1)],1,1);    %Generates a random index in the range 1 to 2*grid_size+1
            all_episodes(t,1) = (grid_middle - grid_size*tick - tick) + tick*s; %Records the corresponding price as the price quoted by AMM 1

     %If Experiment=0 the player exploits at this round. 
        else
            all_episodes(t,1) = all_episodes(t,2) ;            %Records the greedy price as the actual price
        end
     

    %We compute the profit of AMM 1 in episode t. 
    %By default this profit is all_episodes(t,3), which is initialized at
    %0. To get a different profit, it has to be the case that AMM 1 quotes
    %a price below a2 and there is a buyer.

    all_episodes(t,4) = min(all_episodes(t,1),a2);           %compute the lowest price.

    if all_episodes(t,1) == all_episodes(t,4) %Checks that AMM 1 quotes the best price.
    %Investor buys if vc is greater than 
    %the best price a_min, such that profit is (a_min - v_tilde) if [vc >= a_min], and zero otherwise.
       if  all_episodes(t,4) <= vc(t)                          %check that the customer buys
       m=2; %For the moment assume that AMM 1 and AMM 2 quote the same price.
       if all_episodes(t,1) < a2 %Case in which AMM 1 undercuts AMM 2
       m=1;
       end
       %We can now compute AMM A's profit
       all_episodes(t,3) = (all_episodes(t,1) - v_tilde(t) ) / m;    %compute AMM 1's profits. 
       % Note that if AMM 1 is above a2 or if the customer doesn't buy the profit stays the pre-allocated value of zero.
       end
    end

    %Update the q-value associated with the price actually played by AMM 1:
    Q_n(s,1) = alpha*all_episodes(t,3) + (1-alpha)*Q_n(s,1);

end %ends the loop on episodes t

%Compute for how many episodes until the end all AMMs have played the same
%price (AMM 2 always plays the same price)
C_t=1;
while all_episodes(end,1) == all_episodes(end-C_t,1)
    C_t = C_t+1;
end

%Compute the squared values of the variables in all_episodes:
all_episodes(:,5:8) = all_episodes(:,1:4).^2;
%Record the last episode
last_episode = [all_episodes(end,1), a2, all_episodes(end,2), a2, v_tilde(T,1), l(T,1), C_t];
